 1.电商分析之--核心交易之DIM层建表加载数据
   
   首先要确定哪些是事实表、哪些是维表。绿色的是事实表，灰色的维表
   用什么方式处理维表，每日快照、拉链表？
   小表使用每日快照：产品分类表、商家店铺表、商家地域组织表、支付方式表
   大表使用拉链表：产品信息表
   
   1).产品分类表
   数据库中的数据是规范的（满足三范式），但是规范化的数据给查询带来不便。
   备注：这里对商品分类维度表做了逆规范化
   省略了无关信息，做成了宽表
   DROP TABLE IF EXISTS dim.dim_trade_product_cat;
create table if not exists dim.dim_trade_product_cat(
firstId int,      -- 一级商品分类id
firstName string, -- 一级商品分类名称
secondId int,     -- 二级商品分类Id
secondName string, -- 二级商品分类名称
thirdId int,       -- 三级商品分类id
thirdName string   -- 三级商品分类名称
)
partitioned by (dt string)
STORED AS PARQUET;
   
   实现：
   select T1.catid, T1.catname, T2.catid, T2.catname, T3.catid,
T3.catname
from (select catid, catname, parentid
from ods.ods_trade_product_category
where level=3 and dt='2020-07-01') T3

left join

(select catid, catname, parentid
from ods.ods_trade_product_category
where level=2 and dt='2020-07-01') T2
on T3.parentid=T2.catid

left join
(select catid, catname, parentid
from ods.ods_trade_product_category
where level=1 and dt='2020-07-01') T1
on T2.parentid=T1.catid;
   
   数据加载：
   /data/lagoudw/script/trade/dim_load_product_cat.sh

#！/bin/bash

source /etc/profile
if [ -n "$1" ]
then
  do_date=$1
else
  do_date=`date -d "-1 day" +%F`
fi
sql="
insert overwrite table dim.dim_trade_product_cat
partition(dt='$do_date')
select
t1.catid, -- 一级分类id
t1.catname, -- 一级分类名称
t2.catid, -- 二级分类id
t2.catname, -- 二级分类名称
t3.catid, -- 三级分类id
t3.catname -- 三级分类名称
from
-- 商品三级分类数据
(select catid, catname, parentid
from ods.ods_trade_product_category
where level=3 and dt='$do_date') t3 

left join
-- 商品二级分类数据
(select catid, catname, parentid
from ods.ods_trade_product_category
where level=2 and dt='$do_date') t2
on t3.parentid = t2.catid

left join
-- 商品一级分类数据
(select catid, catname, parentid
from ods.ods_trade_product_category
where level=1 and dt='$do_date') t1
on t2.parentid = t1.catid;
"

hive -e "$sql"
   
   2).商品地域组织表
   商家店铺表、商家地域组织表 => 一张维表
   这里也是逆规范化的设计，将商家店铺表、商家地域组织表组织成一张表，并拉宽。
   在一行数据中体现：商家信息、城市信息、地域信息。信息中包括 id 和 name ；
   
   drop table if exists dim.dim_trade_shops_org;
create table dim.dim_trade_shops_org(
shopid int,
shopName string,
cityId int,
cityName string ,
regionId int ,
regionName string
)
partitioned by (dt string)
STORED AS PARQUET;
   
   实现
   select T1.shopid, T1.shopname, T2.id cityid, T2.orgname
cityname, T3.id regionid, T3.orgname regionname
from
(select shopid, shopname, areaid
from ods.ods_trade_shops
where dt='2020-07-01') T1
left join
(select id, parentid, orgname, orglevel
from ods.ods_trade_shop_admin_org
where orglevel=2 and dt='2020-07-01') T2
on T1.areaid=T2.id
left join
(select id, orgname, orglevel
from ods.ods_trade_shop_admin_org
where orglevel=1 and dt='2020-07-01') T3
on T2.parentid=T3.id
limit 10;
   
   /data/lagoudw/script/trade/dim_load_shop_org.sh

#！/bin/bash

source /etc/profile
if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
insert overwrite table dim.dim_trade_shops_org
partition(dt='$do_date')
select t1.shopid,
t1.shopname,
t2.id as cityid,
t2.orgname as cityName,
t3.id as region_id,
t3.orgname as region_name
from (select shopId, shopName, areaId
from ods.ods_trade_shops
where dt='$do_date') t1 
left join
(select id, parentId, orgname, orglevel
from ods.ods_trade_shop_admin_org
where orglevel=2 and dt='$do_date') t2 
on t1.areaid = t2.id
left join
(select id, parentId, orgname, orglevel
from ods.ods_trade_shop_admin_org
where orglevel=1 and dt='$do_date') t3 
on t2.parentid = t3.id;
"

hive -e "$sql"
   3).支付方式表
   drop table if exists dim.dim_trade_payment;
create table if not exists dim.dim_trade_payment(
paymentId string, -- 支付方式id
paymentName string -- 支付方式名称
)
partitioned by (dt string)
STORED AS PARQUET;
   
   /data/lagoudw/script/trade/dim_load_payment.sh

#！/bin/bash

source /etc/profile
if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
insert overwrite table dim.dim_trade_payment
partition(dt='$do_date')
select id, payName
from ods.ods_trade_payments
where dt='$do_date';
"
hive -e "$sql"
   4).商品信息表
   使用拉链表对商品信息进行处理。
   (1).历史数据 => 初始化拉链表(开始日期：当日；结束日期：9999-12-31)【只执行一次】
   (2).拉链表的每日处理【每次加载数据时处理】
       新增数据。每日新增数据(ODS) => 开始日期：当日；结束日期：9999-12-31
	   历史数据。拉链表(DIM) 与 每日新增数据(ODS) 做左连接
	       连接上数据。数据有变化，结束日期：当日；
           未连接上数据。数据无变化，结束日期保持不变；
   (1).创建维表
   拉链表要增加两列，分别记录生效日期和失效日期
   drop table if exists dim.dim_trade_product_info;
create table dim.dim_trade_product_info(
`productId` bigint,
`productName` string,
`shopId` string,
`price` decimal,
`isSale` tinyint,
`status` tinyint,
`categoryId` string,
`createTime` string,
`modifyTime` string,
`start_dt` string,
`end_dt` string
) COMMENT '产品表'
STORED AS PARQUET;
    
   (2).初始数据加载（历史数据加载，只做一次）
   insert overwrite table dim.dim_trade_product_info
select productId,
productName,
shopId,
price,
isSale,
status,
categoryId,
createTime,
modifyTime,
-- modifyTime非空取modifyTime，否则取createTime；substr取日期
case when modifyTime is not null
then substr(modifyTime, 0, 10)
else substr(createTime, 0, 10)
end as start_dt,
'9999-12-31' as end_dt
from ods.ods_trade_product_info
where dt = '2020-07-12';

   (3).增量数据导入（重复执行，每次加载数据执行）
   /data/lagoudw/script/trade/dim_load_product_info.sh	
   
#！/bin/bash
source /etc/profile
if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
insert overwrite table dim.dim_trade_product_info
select productId,
productName,
shopId,
price,
isSale,
status,
categoryId,
createTime,
modifyTime,
case when modifyTime is not null
then substr(modifyTime,0,10)
else substr(createTime,0,10)
end as start_dt,
'9999-12-31' as end_dt
from ods.ods_trade_product_info
where dt='$do_date'

union all

select dim.productId,
dim.productName,
dim.shopId,
dim.price,
dim.isSale,
dim.status,
dim.categoryId,
dim.createTime,
dim.modifyTime,
dim.start_dt,
case when dim.end_dt >= '9999-12-31' and ods.productId
is not null
then '$do_date'
else dim.end_dt
end as end_dt
from dim.dim_trade_product_info dim left join
(select *
from ods.ods_trade_product_info
where dt='$do_date' ) ods
on dim.productId = ods.productId
"

hive -e "$sql"
	
   
   
   